################
#### replication code for Nathan, 2025, ``Explaining Urban Order," World Politics
#### this file replicates all analyses in the *MAIN TEXT*
#### see "SI.R" for the replication code for all analyses in the *SUPPLEMENTARY MATERIAL*
#### 
#### note: this code was written under R version 4.2.1 and uses deprecated GIS packages (maptools, sp, rgeos, raster) that will only run in older versions of R. these packages only became deprecated during the life cycle of this research project. you must install an earlier R version to run the full code.
###############

#clear workspace:
rm(list=ls())

#set working directory to replication data folder:
setwd("~/Dropbox/UMProjects_20172018/UrbanEnvironment/code/crossnational_cities/replication_data/")

# load packages:
library(foreign)
library(RCurl) 
library(AER)
library(sandwich)
library(calibrate)
library(data.table)
library(MASS)
library(lmtest)
library(car)
library(lmerTest)
library(mgcv)
library(sp)
library(maptools)
library(rgeos)
library(sf)
library(rgdal)
library(igraph)
library(raster)


#########################
### load data 
#########################

load("./data_topost/main_analysis_dat.Rdata")
	#this includes one object "main2"
	#this is the neighborhood (t-community) level dataset for the 36 sample cities
	#used in the main analyses

load("./data_topost/citylevel.Rdata")
	#this includes one object "d"
	#this is the city-level data for the largest 1001 cities in Sub-Saharan Africa for which Boeing's (2021) data
	#does not have substantial missingness
	#the file is based on Boeing (2021) and extends it with additional columns

load("./data_topost/housing.Rdata")
	#this includes two objects: base and housing
	#housing is the time series dataset on state housing consturction in 22 cities
	#base is a matrix listing the within case sign on the diretion of main effect for this subsample of cities, based on Figure SI.5

load("./data_topost/timedat.Rdata")
	#contains a series of city-specific time series datasets of V-DEM and Geddes et al democracy measures by year
	#saved in format timedat.accra, timedat.abuja, etc. There is one for each city in the sample. 

ls()

#########################
### make robust standard errors function ####
#########################

summary.lm <- function (object, correlation = FALSE, 
                        symbolic.cor = FALSE, robust=FALSE,
                        cluster=c(NULL,NULL),...) {
  # add extension for robust standard errors
  if(robust==TRUE){ 
    # save variable that are necessary to calcualte robust sd
    X <- model.matrix(object)
    u2 <- residuals(object)^2
    XDX <- 0
    
    ## One needs to calculate X'DX. But due to the fact that
    ## D is huge (NxN), it is better to do it with a cycle.
    for(i in 1:nrow(X)) {
      XDX <- XDX + u2[i]*X[i,]%*%t(X[i,])
    }
    
    # inverse(X'X)
    XX1 <- solve(t(X)%*%X,tol = 1e-100)
    
    # Sandwich Variance calculation (Bread x meat x Bread)
    varcovar <- XX1 %*% XDX %*% XX1
    
    # adjust degrees of freedom 
    dfc_r <- sqrt(nrow(X))/sqrt(nrow(X)-ncol(X))
    
    # Standard errors of the coefficient estimates are the
    # square roots of the diagonal elements
    rstdh <- dfc_r*sqrt(diag(varcovar))
  }
  # add extension for clustered standard errors
  if(!is.null(cluster)&robust==T){warning("Robust standard errors are calculated. Set robust=F to calculate clustered standard errors.")}
  if(!is.null(cluster)&robust==F){
    if(""%in%cluster){stop("No variable for clustering provided.")}
    if(length(cluster)>2){stop("The function only allows max. 2 clusters. You provided more.")}
    n_coef <- all.vars(object$call$formula)
    if(length(cluster)==1){
      dat <- na.omit(eval(object$call$data)[,c(n_coef,cluster)])
      if(nrow(dat)<nrow(object$model)){stop("Not all observation have a cluster.")}
      cluster_vector <- dat[,cluster]
      require(sandwich, quietly = TRUE)
      M <- res_length <- length(unique(cluster_vector))
      N <- length(cluster_vector)
      K <- object$rank
      dfc <- (M/(M-1))*((N-1)/(N-K))
      uj  <- na.omit(apply(estfun(object),2, function(x) tapply(x, cluster_vector, sum)));
      varcovar <- dfc*sandwich(object, meat=crossprod(uj)/N)
      rstdh <- sqrt(diag(varcovar))
    } 
    if(length(cluster)==2){
      dat_1 <- na.omit(eval(object$call$data)[,c(n_coef,cluster[1])])
      if(nrow(dat_1)<nrow(object$model)){stop("Not all observation have a cluster.")}
      dat_2 <- na.omit(eval(object$call$data)[,c(n_coef,cluster[2])])
      if(nrow(dat_2)<nrow(object$model)){stop("Not all observation have a cluster.")}
      
      dat <- na.omit(eval(object$call$data)[,c(n_coef,cluster)])
      library(sandwich,quietly = TRUE)
      cluster1 <- dat[,cluster[1]]
      cluster2 <- dat[,cluster[2]]
      cluster12 = paste(cluster1,cluster2, sep="")
      M1  <- length(unique(cluster1))
      M2  <- length(unique(cluster2))   
      M12 <- res_length <-length(unique(cluster12))
      N   <- length(cluster1)          
      K   <- object$rank             
      dfc1  <- (M1/(M1-1))*((N-1)/(N-K))  
      dfc2  <- (M2/(M2-1))*((N-1)/(N-K))  
      dfc12 <- (M12/(M12-1))*((N-1)/(N-K))  
      u1j   <- apply(estfun(object), 2, function(x) tapply(x, cluster1,  sum)) 
      u2j   <- apply(estfun(object), 2, function(x) tapply(x, cluster2,  sum)) 
      u12j  <- apply(estfun(object), 2, function(x) tapply(x, cluster12, sum)) 
      vc1   <-  dfc1*sandwich(object, meat=crossprod(u1j)/N )
      vc2   <-  dfc2*sandwich(object, meat=crossprod(u2j)/N )
      vc12  <- dfc12*sandwich(object, meat=crossprod(u12j)/N)
      varcovar <- vc1 + vc2 - vc12
      rstdh <- sqrt(diag(varcovar))
    } 
    
  }
  z <- object
  p <- z$rank
  rdf <- z$df.residual
  if (p == 0) {
    r <- z$residuals
    n <- length(r)
    w <- z$weights
    if (is.null(w)) {
      rss <- sum(r^2)
    }
    else {
      rss <- sum(w * r^2)
      r <- sqrt(w) * r
    }
    resvar <- rss/rdf
    ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
    class(ans) <- "summary.lm"
    ans$aliased <- is.na(coef(object))
    ans$residuals <- r
    ans$df <- c(0L, n, length(ans$aliased))
    ans$coefficients <- matrix(NA, 0L, 4L)
    dimnames(ans$coefficients) <- list(NULL, c("Estimate", 
                                               "Std. Error", "t value", "Pr(>|t|)"))
    ans$sigma <- sqrt(resvar)
    ans$r.squared <- ans$adj.r.squared <- 0
    return(ans)
  }
  if (is.null(z$terms)) 
    stop("invalid 'lm' object:  no 'terms' component")
  if (!inherits(object, "lm")) 
    warning("calling summary.lm(<fake-lm-object>) ...")
  Qr <- stats:::qr.lm(object)
  n <- NROW(Qr$qr)
  if (is.na(z$df.residual) || n - p != z$df.residual) 
    warning("residual degrees of freedom in object suggest this is not an \"lm\" fit")
  r <- z$residuals
  f <- z$fitted.values
  w <- z$weights
  if (is.null(w)) {
    mss <- if (attr(z$terms, "intercept")) 
      sum((f - mean(f))^2)
    else sum(f^2)
    rss <- sum(r^2)
  }
  else {
    mss <- if (attr(z$terms, "intercept")) {
      m <- sum(w * f/sum(w))
      sum(w * (f - m)^2)
    }
    else sum(w * f^2)
    rss <- sum(w * r^2)
    r <- sqrt(w) * r
  }
  resvar <- rss/rdf
  if (is.finite(resvar) && resvar < (mean(f)^2 + var(f)) * 
      1e-30) 
    warning("essentially perfect fit: summary may be unreliable")
  p1 <- 1L:p
  R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
  se <- sqrt(diag(R) * resvar)
  
  if(robust==T){se <- rstdh}
  if(!is.null(cluster)&robust==F){se <- rstdh}
  est <- z$coefficients[Qr$pivot[p1]]
  tval <- est/se
  ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
  ans$residuals <- r
  pval <- 2 * pt(abs(tval), 
                 rdf, lower.tail = FALSE)
  ans$coefficients <- cbind(est, se, tval, pval)
  dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]], 
                                     c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
  ans$aliased <- is.na(coef(object))
  ans$sigma <- sqrt(resvar)
  ans$df <- c(p, rdf, NCOL(Qr$qr))
  if (p != attr(z$terms, "intercept")) {
    df.int <- if (attr(z$terms, "intercept")) 
      1L
    else 0L
    ans$r.squared <- mss/(mss + rss)
    ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - 
                                                       df.int)/rdf)
    ans$fstatistic <- c(value = (mss/(p - df.int))/resvar, 
                        numdf = p - df.int, dendf = rdf)
    if(robust==T|(!is.null(cluster))){
      if(!is.null(cluster)){rdf <- res_length -1}
      pos_coef <- match(names(z$coefficients)[-match("(Intercept)",
                                                     names(z$coefficients))],
                        names(z$coefficients))
      
      P_m <- matrix(z$coefficients[pos_coef])
      
      R_m <- diag(1, 
                  length(pos_coef), 
                  length(pos_coef))
      
      ans$fstatistic <- c(value = t(R_m%*%P_m)%*%
                            (solve(varcovar[pos_coef,pos_coef],tol = 1e-100))%*%
                            (R_m%*%P_m)/(p - df.int), 
                          numdf = p - df.int, dendf = rdf)
      
    }
    
  }
  else ans$r.squared <- ans$adj.r.squared <- 0
  ans$cov.unscaled <- R
  dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1, 
                                                             1)]
  if (correlation) {
    ans$correlation <- (R * resvar)/outer(se, se)
    dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
    ans$symbolic.cor <- symbolic.cor
  }
  if (!is.null(z$na.action)) 
    ans$na.action <- z$na.action
  class(ans) <- "summary.lm"
  ans
}


	
	
########################################
#### Figure 1: Examples of orientation order 
########################################

### Panels (a) and (b) of Figure 1 were made manually (as screenshots) in QGIS. There is no replication code. You can visualize the same maps just by opening the "edges" component of the .gpkg files for these two cities in any GIS software. The .gpkg files for these two cities (taken directly from Boeing's replication data) have been included as part of the uploaded data for this paper in the "data_topost/figure1_shapefiles/" sub-folder. They are: "al_ubayyid-4140.gpkg" and "kampala-4427.gpkg"

### Panels (c) and (d) of Figure 2 were made in a separate python file: see "polarhists.py" which also draws on replication data from Boeing (2022) for these two cities and adapts replication code from Boeing (2022) for making polar histograms. 

#the summary statistics for orientation order quoted in the figure captions can be seen via:

round(d$orientation_order[d$core_city=="kampala"], digits=3)
#0.003

round(d$orientation_order[d$core_city=="al_ubayyid"], digits=3)
#0.865

	
########################################
#### Figure 2: city-level orientation order across Sub-Saharan Africa 
########################################

pdf(file="./figs/figure2.pdf", height=6, width=8)

hist(d$orientation_order, breaks=35, main="Distribution of orientation order (O) \n(n=1001 cities)", xlab="orientation order (O)", ylab="frequency", col="dodgerblue2")

dev.off()




########################################
#### Figure 3: Orientation order at the neighborhood level 
########################################

city_names <-  c("abuja", "accra", "addis_ababa", "arusha", "bamako", "beira", "brikama", "buchanan", "bujumbura", "cape_town", "gaborone", "gombe", "ibadan", "johannesburg", "kampala", "kara", "khartoum", "kigali", "kinshasa", "lagos", "luanda", "lubumbashi", "malabo", "manzini", "maputo", "maseru", "monrovia", "nairobi", "nakuru", "ndola", "oyo", "pointe_noire", "port_elizabeth", "porto_novo", "touba", "windhoek")
city_labels <- c("abuja-2565", "accra-1910", "addis_ababa-5134", "arusha-4800", "bamako-1553", "beira-4521", "brikama-1458", "buchanan-1533", "bujumbura-4078", "cape_town-3268", "gaborone-3587", "gombe-2932", "ibadan-2189", "johannesburg-3673", "kampala-4427", "kara-2026", "khartoum-4335", "kigali-4172", "kinshasa-3209", "lagos-2125", "luanda-3050", "lubumbashi-3756", "malabo-2762", "manzini-4080", "maputo-4220", "maseru-3631", "monrovia-1526", "nairobi-4808", "nakuru-4729", "ndola-3859", "oyo-2191", "pointe_noire-2982", "port_elizabeth-3505", "porto_novo-2101", "touba-1473", "windhoek-3260")
city_fixed <- c("abuja-combined", "accra-1910", "addis_ababa-5134", "arusha-4800", "bamako-1553", "beira-4521", "brikama-1458", "buchanan-1533", "bujumbura-4078", "cape_town-3268", "gaborone-3587", "gombe-2932", "ibadan-2189", "johannesburg-3673", "kampala-4427", "kara-2026", "khartoum-4335", "kigali-4172", "kinshasa-3209", "lagos-2125", "luanda-3050", "lubumbashi-3756", "malabo-2762", "manzini-4080", "maputo-4220", "maseru-3631", "monrovia-1526", "nairobi-4808", "nakuru-4729", "ndola-3859", "oyo-2191", "pointe_noire-2982", "port_elizabeth-3505", "porto_novo-2101", "touba-1473", "windhoek-3260")

holder2 <- as.data.frame(matrix(NA, nrow=36, ncol=4))
for(i in 1:length(unique(main2$city_fixed))){
holder2[i,1] <- d$orientation_order[d$city==city_labels[i]]
holder2[i,2] <- city_labels[i]
holder2[i,3] <- city_names[i]
holder2[i,4] <- city_fixed[i]

 }
 
holder2a <- holder2[order(holder2$V1),] 
 
main2$city_fixed_fac <- as.factor(main2$city_fixed) 
 main2$city_fixed_fac <- factor(main2$city_fixed_fac, levels=holder2a$V4)
 city_names_order <- holder2a$V3

pdf(file="./figs/figure3.pdf", height=8, width=8) 
par(mar = c(5, 7, 4, 2))
boxplot(main2$orientation_order ~ main2$city_fixed_fac, names=city_names_order, horizontal=T, cex=0.7, pch=16, col="dodgerblue2", xlab="orientation order (O)", ylab="", las=1, cex.axis=0.7, main="Distributions of orientation order (O)\nwithin cities, by neighborhood")
points(holder2a[,1], 1:36, pch=15, col="firebrick", cex=1.2)
dev.off()



########################################
#### Figure 4: Examples of neighborhoods as ``t-communities" in Accra, Ghana 
########################################

### There is no replication code for this figure. It was made manually as a set of screenshots (one per panel) in QGIS after opening the shapefile of street edges in each of the four example t-communities from Accra on top of the OpenStreetMap base layer that comes as part of QGIS.
### The four t-communities and there orientation order values are printed here:

#panel (a)
round(main2$orientation_order[main2$city=="accra-1910" & main2$subset=="tcom_108"], digits=2)
#0.96

#panel (b)
round(main2$orientation_order[main2$city=="accra-1910" & main2$subset=="tcom_112"], digits=2)
#0.74

#panel (c)
round(main2$orientation_order[main2$city=="accra-1910" & main2$subset=="tcom_148"], digits=2)
#0.36

#panel (d)
round(main2$orientation_order[main2$city=="accra-1910" & main2$subset=="tcom_143"], digits=2)
#0.14

### In the "/data_topost/figure4_shapefiles/" sub-folder I also provide the shapefiles for these four t-communities so you can visualize the same streets in any GIS software of your choosing. 




########################################
#### Figures 5 and 6: Khartoum, example of historical bands 
########################################

#load Boeing's replication data geopackage for Khartoum: 
edges <- readOGR("./data_topost/figure5_shapefiles/khartoum-4335.gpkg", "edges")
class(edges)

### load all the urban extent files for khartoum at each time band:
### NOTE: urban extent files were made to be opened consecutively and read cumulatively. manual digitization occured sequentially. to save time, locations coded as built already in the previous time band is NOT necessarily marked as built in the new time band in cases where the new time band was digitized manually from an historical map. instead, we only marked *NEW* (extensive margin) built area, as only this new area would be coded as built in the new time band (as you can see in the code below). 
### by contrast, urban extent files based on the pre-existing Angel et al data are comprehensive of all built area in the city as of that time.

#urban extent at 1883
t1883 <- readOGR("./data_topost/figure5_shapefiles/khartoum1883.shp")
class(t1883)
proj4string(t1883)

#urban extent at 1906
t1906 <- readOGR("./data_topost/figure5_shapefiles/khartoum1906.shp")
class(t1906)
proj4string(t1906)

#urban extent at 1946
t1946 <- readOGR("./data_topost/figure5_shapefiles/khartoum1946.shp")
class(t1946)
proj4string(t1946)

#urban extent at 1956
t1956 <- readOGR("./data_topost/figure5_shapefiles/GreaterKhartoum1956.shp")
class(t1956)
proj4string(t1956)
t1956 <- spTransform(t1956, CRS(" +proj=longlat +datum=WGS84 +no_defs "))

#urban extent at 1965
t1965 <- readOGR("./data_topost/figure5_shapefiles/khartoum1965.shp")
class(t1965)
proj4string(t1965)

#urban extent at 1988 (from Angel et al 2016 LINCOLN ATLAS DATA)
t1988 <- readOGR("./data_topost/figure5_shapefiles/urbFootprint_t1_subset_pruned.shp")
class(t1988)
proj4string(t1988)

#urban extent at 2000 (from Angel et al 2016 LINCOLN ATLAS DATA)
t2000 <- readOGR("./data_topost/figure5_shapefiles/urbFootprint_t2_subset_pruned.shp")
class(t2000)
proj4string(t2000)

#urban extent at 2009
t2009 <- readOGR("./data_topost/figure5_shapefiles/khartoum2009.shp")
class(t2009)
proj4string(t2009)

#urban extent at 2014 (from Angel et al 2016 LINCOLN ATLAS DATA)
t2014 <- readOGR("./data_topost/figure5_shapefiles/urbFootprint_t3_subset_pruned.shp")
class(t2014)
proj4string(t2014)

proj4string(edges)

### code the edges using overlay that fall within each time band

#pre_1883
test <- over(edges, t1883)
length(test)
dim(edges)
head(test)
edges$pre_1883 <- 0
edges$pre_1883[is.na(test)==F] <- 1
table(test)
table(edges$pre_1883)

#bw1883to1906
test <- over(edges, t1906)
dim(test)
dim(edges)
head(test)
edges$bw1883to1906 <- 0
edges$bw1883to1906[is.na(test$id)==F & edges$pre_1883 ==0] <- 1
table(edges$pre_1883, edges$bw1883to1906)

#bw1906to1946
test <- over(edges, t1946)
dim(test)
dim(edges)
head(test)
edges$bw1906to1946 <- 0
edges$bw1906to1946[is.na(test$id)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0] <- 1
table(edges$bw1883to1906, edges$bw1906to1946)

#bw1946to1956
test <- over(edges, t1956)
dim(test)
dim(edges)
head(test)
edges$bw1946to1956 <- 0
edges$bw1946to1956[is.na(test$id)==F  & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0] <- 1
table(edges$bw1906to1946, edges$bw1946to1956)

#bw1956to1965
test <- over(edges, t1965)
dim(test)
dim(edges)
head(test)
edges$bw1956to1965 <- 0
edges$bw1956to1965[is.na(test$id)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0] <- 1
table(edges$bw1946to1956, edges$bw1956to1965)

#bw1965to1988
test <- over(edges, t1988[1:(nrow(t1988)/4),])
#dim(test)
#dim(edges)
#head(test)
edges$bw1965to1988 <- 0
edges$bw1965to1988[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0] <- 1
test <- over(edges, t1988[(nrow(t1988)/4):(nrow(t1988)/2),])
edges$bw1965to1988[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0] <- 1
test <- over(edges, t1988[(nrow(t1988)/2):(3*nrow(t1988)/4),])
edges$bw1965to1988[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0] <- 1
test <- over(edges, t1988[(3*nrow(t1988)/4):nrow(t1988),])
edges$bw1965to1988[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0] <- 1
table(edges$bw1956to1965, edges$bw1965to1988)

#bw1988to2000
test <- over(edges, t2000[1:(nrow(t2000)/4),])
#dim(test)
#dim(edges)
#head(test)
edges$bw1988to2000 <- 0
edges$bw1988to2000[is.na(test$fid)==F  & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0] <- 1
test <- over(edges, t2000[(nrow(t2000)/4):(nrow(t2000)/2),])
edges$bw1988to2000[is.na(test$fid)==F  & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0] <- 1
test <- over(edges, t2000[(nrow(t2000)/2):(3*nrow(t2000)/4),])
edges$bw1988to2000[is.na(test$fid)==F  & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0] <- 1
test <- over(edges, t2000[(3*nrow(t2000)/4):nrow(t2000),])
edges$bw1988to2000[is.na(test$fid)==F  & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0] <- 1
table(edges$bw1965to1988, edges$bw1988to2000)

#bw2000to2009
test <- over(edges, t2009)
dim(test)
dim(edges)
head(test)
edges$bw2000to2009 <- 0
edges$bw2000to2009[is.na(test$id)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0 & edges$bw1988to2000 ==0] <- 1
table(edges$bw1988to2000, edges$bw2000to2009)

#bw2009to2014
test <- over(edges, t2014[1:(nrow(t2014)/4),])
edges$bw2009to2014 <- 0
edges$bw2009to2014[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0 & edges$bw1988to2000 ==0 & edges$bw2000to2009 ==0] <- 1
test <- over(edges, t2014[(nrow(t2014)/4):(nrow(t2014)/2),])
edges$bw2009to2014[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0 & edges$bw1988to2000 ==0 & edges$bw2000to2009 ==0] <- 1
test <- over(edges, t2014[(nrow(t2014)/2):(3*nrow(t2014)/4),])
edges$bw2009to2014[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0 & edges$bw1988to2000 ==0 & edges$bw2000to2009 ==0] <- 1
test <- over(edges, t2014[(3*nrow(t2014)/4):(nrow(t2014)),])
edges$bw2009to2014[is.na(test$fid)==F & edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0 & edges$bw1988to2000 ==0 & edges$bw2000to2009 ==0] <- 1
table(edges$bw2000to2009, edges$bw2009to2014)


#bw2014to2020
edges$bw2014to2020 <- 0
edges$bw2014to2020[ edges$pre_1883 ==0 & edges$bw1883to1906 ==0 & edges$bw1906to1946 ==0 & edges$bw1946to1956 ==0 & edges$bw1956to1965 ==0 & edges$bw1965to1988 ==0 & edges$bw1988to2000 ==0 & edges$bw2000to2009 ==0 & edges$bw2009to2014 ==0] <- 1



pdf(file="./figs/khartoum_panel1.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="1821-1883", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883==1,], col="red", lwd=0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel2.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="1883-1906",cex.main=2.1,  col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel3.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="1906-1946", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="red", lwd= 0.4, add=T)

dev.off()



pdf(file="./figs/khartoum_panel4.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="1946-1956",cex.main=2.1,  col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1946to1956 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel5.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="1956-1965", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1946to1956 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1956to1965 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel6.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="1965-1988", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1946to1956 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1956to1965 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1965to1988 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel7.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="1988-2000", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1946to1956 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1956to1965 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1965to1988 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1988to2000 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel8.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="2000-2009", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1946to1956 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1956to1965 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1965to1988 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1988to2000 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw2000to2009 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel9.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="2009-2014", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1946to1956 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1956to1965 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1965to1988 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1988to2000 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw2000to2009 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw2009to2014 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


pdf(file="./figs/khartoum_panel10.pdf", height=8, width=8)

plot(bbox(edges)[1,], bbox(edges)[2,], main="2014-2020", cex.main=2.1, col.main="red", col="white", xaxt="n", xlab="", yaxt="n", ylab="", bty="n")
plot(edges[edges$pre_1883 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1883to1906 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1906to1946 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1946to1956 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1956to1965 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1965to1988 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw1988to2000 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw2000to2009 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw2009to2014 ==1,], col="grey59", lwd= 0.4, add=T)
plot(edges[edges$bw2014to2020 ==1,], col="red", lwd= 0.4, add=T)

dev.off()


########################################
#### I concentrate on 36 cities (listed in Figure 3), selected by combining the 20 African cities in Angel et al.’s (2016) global random sample of 200 major cities, for which they provide detailed over time satellite data on urban footprints, with an additionally randomly selected 16 cities above a similar size cutoff
########################################


d$in_lincoln_atlas <- 0
d$in_lincoln_atlas[d$core_city %in% c("accra", "addis_ababa", "arusha", "bamako", "beira", "gombe", "ibadan", "johannesburg", "kampala", "khartoum", "kigali", "kinshasa", "lagos", "luanda", "lubumbashi", "nairobi", "nakuru", "ndola", "oyo", "port_elizabeth")] <- 1
table(d$in_lincoln_atlas)


table(d$country)
#calculate the largest *2* cities in each country: to increase odds we have historical materials on them
new2 <- d[d$core_city=="accra",]
for(i in 1:length(unique(d$country))){
	new <- d[d$country == unique(d$country)[i],]
	new <- new[order(new$resident_pop, decreasing=T),]
	if(nrow(new)>=3){
	new2 <- rbind(new2, new[1:2,])
	} else{
	new2 <- rbind(new2, new)
	}
}
dim(new2)
#83 (accra is double counted) cities that are 1st or 2nd largest in their country in the sample. 
#top 2 largest cities OR the national capital -- that's going to be your exclusion restriction for this subsample

d$eligible_for_sample <- 0 
d$eligible_for_sample[(d$uc_id %in% new2$uc_id | d$nat_capital_currently==1) & d$in_lincoln_atlas==0] <- 1 
	#sample excludes the ones already in the lincoln atlas
table(d$eligible_for_sample)

	#stratify by whether it was a SETTLER COLONY *AND* HOW ORDERLY IT IS OVERALL (compared to the median of D)
d2 <- d[d$eligible_for_sample==1,]
d2 $strata <- ifelse(d2 $settler_colony ==1 & d2 $orientation_order > median(d$orientation_order), 1, NA)
	#this first strata doesn't really exist
d2 $strata <- ifelse(d2 $settler_colony ==1 & d2 $orientation_order <= median(d$orientation_order), 2, d2$strata)
d2 $strata <- ifelse(d2 $settler_colony ==0 & d2 $orientation_order > median(d$orientation_order), 3, d2$strata)
d2 $strata <- ifelse(d2 $settler_colony ==0 & d2 $orientation_order <= median(d$orientation_order), 4, d2$strata)
table(d2 $strata)

round((table(d2$strata) / nrow(d2)) * 16, digits=0)
#1 2 3 4 
#0 3 5 8 

set.seed(48103)
samp1 <- sample(d2 $uc_id[d2 $strata==1], round((table(d2$strata) / nrow(d2)) * 16, digits=0)[1], replace=F)
samp2 <- sample(d2 $uc_id[d2 $strata==2], round((table(d2 $strata) / nrow(d2)) * 16, digits=0)[2], replace=F)
samp3 <- sample(d2 $uc_id[d2 $strata==3], round((table(d2 $strata) / nrow(d2)) * 16, digits=0)[3], replace=F)
samp4 <- sample(d2 $uc_id[d2 $strata==4], round((table(d2 $strata) / nrow(d2)) * 16, digits=0)[4], replace=F)

d$band_sample <- 0
d$band_sample[d$in_lincoln_atlas==1 | d$uc_id %in% c(samp1, samp2, samp3, samp4)] <- 1
table(d$band_sample)

sort(d$core_city[d$band_sample==1]) 

band_sample <- d[d$band_sample==1,]
band_sample <- band_sample[order(band_sample$core_city),]
band_sample$core_city
	#these are the sampled cities in the paper

########################################
#### subsetting to the 5,525 neighborhoods (73% of total) built after independence in each city
########################################

sub1 <- main2[main2$colrule == 0 & main2$average_age >= 1950,]
dim(sub1)


########################################
#### Table 1: Neighborhood-level griddedness (O) and regime type, post-colonial period
########################################

sub1 $auto_med <- 0
sub1 $auto_med[sub1 $v2x_polyarchy <= median(sub1$v2x_polyarchy)] <- 1 
median(sub1$v2x_polyarchy)

##Panel A: FE versions (no pooling)
fe1 <- lm(orientation_order ~ average_age + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe2 <- lm(orientation_order ~ v2x_polyarchy + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe3 <- lm(orientation_order ~ v2x_polyarchy + average_age + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe4 <- lm(orientation_order ~ auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe5 <- lm(orientation_order ~ auto_med + average_age + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2$residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe3, cluster=c("country"))$coefficients, digits=3)
length(fe3$residual)
round(summary(fe3, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1$residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe4, cluster=c("country"))$coefficients, digits=3)
length(fe4$residual)
round(summary(fe4, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe5, cluster=c("country"))$coefficients, digits=3)
length(fe5$residual)
round(summary(fe5, cluster=c("country"))$adj.r.squared, digits=3)

##Panel B: glmer versions: (partial pooling)
lmer1 <- lmer(orientation_order ~ average_age  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)

lmer2 <- lmer(orientation_order ~ v2x_polyarchy  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)

lmer3 <- lmer(orientation_order ~ v2x_polyarchy  + average_age  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)

lmer4 <- lmer(orientation_order ~ auto_med  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)

lmer5 <- lmer(orientation_order ~ auto_med  + average_age +  node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony + (1 | city_fixed), data= sub1)

round(summary(lmer2)$coefficients, digits=3)
length(summary(lmer2)$residuals)

round(summary(lmer3)$coefficients, digits=3)
length(summary(lmer3)$residuals)

round(summary(lmer1)$coefficients, digits=3)
length(summary(lmer1)$residuals)

round(summary(lmer4)$coefficients, digits=3)
length(summary(lmer4)$residuals)

round(summary(lmer5)$coefficients, digits=3)
length(summary(lmer5)$residuals)

##Panel C: full pooling: no HLM / no FEs
lm1 <- lm(orientation_order ~ average_age  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony, data= sub1)

lm2 <- lm(orientation_order ~ v2x_polyarchy  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony, data= sub1)

lm3 <- lm(orientation_order ~ v2x_polyarchy  + average_age  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony, data= sub1)

lm4 <- lm(orientation_order ~ auto_med  + node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony, data= sub1)

lm5 <- lm(orientation_order ~ auto_med  + average_age +  node_count + elev_median + elev_std + city_elev_std + nat_capital_historically + ocean_coast + border_city + rain_mm + soil_quality_FAO + precolonial_major_city + colonial_power_britain + colonial_power_france + colonial_power_none + settler_colony, data= sub1)

round(summary(lm2, cluster=c("country"))$coefficients, digits=3)
length(lm2 $residual)
round(summary(lm2, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(lm3, cluster=c("country"))$coefficients, digits=3)
length(lm3 $residual)
round(summary(lm3, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(lm1, cluster=c("country"))$coefficients, digits=3)
length(lm1 $residual)
round(summary(lm1, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(lm4, cluster=c("country"))$coefficients, digits=3)
length(lm4 $residual)
round(summary(lm4, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(lm5, cluster=c("country"))$coefficients, digits=3)
length(lm5 $residual)
round(summary(lm5, cluster=c("country"))$adj.r.squared, digits=3)



########################################
#### Table 2: Neighborhood-level griddedness (O) and state capacity, post-colonial period
########################################


sub1$e_migdppcA <- sub1$e_migdppc/1000
#scale it in 1000s for readability 

sub1$e_migdppc_growthA <- sub1$e_migdppc_growth/1000
#scale it in 1000s for readability

fe8 <- lm(orientation_order ~ sh_capacity + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe9 <- lm(orientation_order ~ (sh_capacity * auto_med) + sh_capacity + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe1 <- lm(orientation_order ~ e_migdppcA + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe2 <- lm(orientation_order ~ (e_migdppcA * auto_med) + e_migdppcA + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe3 <- lm(orientation_order ~ e_migdppc_growthA + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe4 <- lm(orientation_order ~ (e_migdppc_growthA * auto_med) + e_migdppc_growthA + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe5 <- lm(orientation_order ~ after_IMF_sa + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

fe6 <- lm(orientation_order ~ (after_IMF_sa * auto_med) + after_IMF_sa + auto_med + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1)

round(summary(fe8, cluster=c("country"))$coefficients, digits=3)
length(fe8$residual)
round(summary(fe8, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe9, cluster=c("country"))$coefficients, digits=3)
length(fe9$residual)
round(summary(fe9, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe1, cluster=c("country"))$coefficients, digits=3)
length(fe1$residual)
round(summary(fe1, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe2, cluster=c("country"))$coefficients, digits=3)
length(fe2$residual)
round(summary(fe2, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe3, cluster=c("country"))$coefficients, digits=3)
length(fe3$residual)
round(summary(fe3, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe4, cluster=c("country"))$coefficients, digits=3)
length(fe4$residual)
round(summary(fe4, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe5, cluster=c("country"))$coefficients, digits=3)
length(fe5$residual)
round(summary(fe5, cluster=c("country"))$adj.r.squared, digits=3)

round(summary(fe6, cluster=c("country"))$coefficients, digits=3)
length(fe6$residual)
round(summary(fe6, cluster=c("country"))$adj.r.squared, digits=3)



########################################
#### Assorted quantities extracted from the state housing dataset
########################################

td2 <- rbind(timedat.abuja[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.accra[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.addis_ababa[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.bamako[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.bujumbura[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.cape_town[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.gombe[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.ibadan[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.johannesburg[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.kampala[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.khartoum[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.kinshasa[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.lagos[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.luanda[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.lubumbashi[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.maseru[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.monrovia[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.ndola[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.oyo[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.pointe_noire[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.port_elizabeth[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")], 
timedat.porto_novo[,c("city_fixed", "year", "v2exl_legitideolcr_1", "gwf_democracy_extended")] )

#drop the 2 cities that are all NAs:
base <- base[!base$city %in% c("lubumbashi-3756","oyo-2191"),]
dim(base)

base$housing_under_high_autocracy <- 0
base$prop_high_autocracy_w_housing <- NA
base$prop_all_autocracy_w_housing <- NA
base$prop_democracy_w_housing <- NA
#average polyarchy score during years that housing is being constructed
base$avg_polyrachy_duringhousing <- NA
base$avg_polyarchy_overall <- NA
base$avg_polyarchy_diff <- NA
#of housing that is constructued under HIGH autocracy, what proportion targets middle class / elites: 
base$prop_main_targets_HA_mc_cs_elites <- NA

for(i in 1:nrow(base)){
	d1 <- housing[housing$city==base$city[i],]
	td2a <- td2[td2$city_fixed==base$city[i],]
	
	base$housing_under_high_autocracy[i] <- ifelse( max(d1$state_housing_any[d1$v2x_polyarchy<=0.232])==1, 1, 0)
	
	base$prop_high_autocracy_w_housing[i] <- mean(d1$state_housing_any[d1$v2x_polyarchy<=0.232])
	
	base$prop_all_autocracy_w_housing[i] <- mean(d1$state_housing_any[d1$v2x_polyarchy < 0.5])
	
	base$prop_democracy_w_housing[i] <- mean(d1$state_housing_any[d1$v2x_polyarchy >= 0.5])

	base$avg_polyrachy_duringhousing[i] <-  mean(d1[d1$state_housing_any==1,"v2x_polyarchy"])
	
	base$avg_polyarchy_overall[i] <- mean(d1$v2x_polyarchy)

	base$avg_polyarchy_diff[i] <- base$avg_polyrachy_duringhousing[i] - base$avg_polyarchy_overall[i]

	#of housing that is constructed, what proportion targets middle class / elites: 
	d1$main_targets_mc <- 0
	d1$main_targets_mc[d1$primary_recipient_income %in% c("high income", "middle income", "middle to high income")] <- 1
	d1$main_targets_cs <- 0
	d1$main_targets_cs[d1$targets_civilservants_orelites=="yes"] <- 1
	d1$both <- d1$main_targets_mc + d1$main_targets_cs
	
base$prop_main_targets_HA_mc_cs_elites[i] <- mean(d1$both[d1$state_housing_any==1 & d1$v2x_polyarchy<=0.232] > 0)

}


### "75% of these 20 cities had state housing provision during a period they were coded as a high autocracy (polyarchy ≤ 0.23)"

mean(base$housing_under_high_autocracy)
#0.75

### "housing programs were active, on average, in 41% of the total city-years under high autocracy"

mean(base$prop_high_autocracy_w_housing, na.rm=TRUE)
#0.411698

### "Civil servants, employees of parastatal companies, or other middle to high income formal sector workers were identified as the primary recipients of 71% of projects in periods of high autocracy"

mean(base$prop_main_targets_HA_mc_cs_elites, na.rm=TRUE)
#0.7088023

### "Only 29% of years with polyarchy scores above 0.5, a standard cutoff for full democracies..." had state housing projects

mean(base$prop_democracy_w_housing, na.rm=TRUE)
#0.2908448

#### "Without South Africa, only 9% of city-years with polyarchy above 0.5 have state provision, compared to 34% of city-years with polyarchy below 0."
mean(base$prop_democracy_w_housing[!base$city %in% c("cape_town-3268", "johannesburg-3673", "port_elizabeth-3505") ], na.rm=TRUE)
#0.0857906

mean(base$prop_all_autocracy_w_housing[!base$city %in% c("cape_town-3268", "johannesburg-3673", "port_elizabeth-3505") ], na.rm=TRUE)
#0.3442362

#### In the fixed effect model from column 5 of Panel (a) of Table 1, this new variable predicts griddedness, with p=0.08, with neighborhoods built at a time the state was fully active in housing construction scoring 4.1 p.p. (95% CI: -0.5, 8.6) higher on orientation order (O) than neighborhoods built when housing provision was inactive

fe5 <- lm(orientation_order ~ state_housing_any + auto_med + average_age + node_count + elev_median + elev_std + as.factor(city_fixed), data= sub1[is.na(sub1$state_housing_any)==F,] )
summary(fe5, cluster=c("country"))
#state_housing_any                         4.059e-02  2.315e-02   1.754 0.079527 . 

summary(fe5, cluster=c("country"))$coefficients[2,1]
#0.04059332
summary(fe5, cluster=c("country"))$coefficients[2,1] + 1.96*summary(fe5, cluster=c("country"))$coefficients[2,2]
#0.08595943
summary(fe5, cluster=c("country"))$coefficients[2,1] - 1.96*summary(fe5, cluster=c("country"))$coefficients[2,2]
#-0.004772789

#### among the cities with positive autocracy-griddedndess relationships when estimating column 4 of panel (a) of Table 1 separately by city, the average polyarchy score in years with state housing provision is 7.3 p.p. lower than in years without it... in the smaller set of deviant cases in which there is instead a negative autocracy-griddedness relationship, the relationship between autocracy and housing also flips; the average polyarchy score in years with housing is instead 8.1 p.p. higher than in years without

mean(base$avg_polyarchy_diff[base$within_case_pattern=="positive"], na.rm=TRUE)
#-0.07328606

mean(base$avg_polyarchy_diff[base$within_case_pattern=="negative"], na.rm=TRUE)
#0.08097


########################################
#### Figure 7: Interaction of high autocracy and state housing provision
########################################

interdata <- sub1[is.na(sub1$state_housing_any)==F ,]
interdata$accra <- ifelse(interdata$city_fixed=="accra-1910", 1, 0)
interdata$addis_ababa <- ifelse(interdata$city_fixed=="addis_ababa-5134", 1, 0) 
interdata$bamako <- ifelse(interdata$city_fixed=="bamako-1553", 1, 0)
interdata$bujumbura <- ifelse(interdata$city_fixed=="bujumbura-4078", 1, 0)
interdata$cape_town <- ifelse(interdata$city_fixed=="cape_town-3268", 1, 0)
interdata$gombe <- ifelse(interdata$city_fixed=="gombe-2932", 1, 0)
interdata$ibadan <- ifelse(interdata$city_fixed=="ibadan-2189", 1, 0)
interdata$johannesburg <- ifelse(interdata$city_fixed=="johannesburg-3673", 1, 0)
interdata$kampala <- ifelse(interdata$city_fixed=="kampala-4427", 1, 0)
interdata$khartoum <- ifelse(interdata$city_fixed=="khartoum-4335", 1, 0)
interdata$kinshasa <- ifelse(interdata$city_fixed=="kinshasa-3209", 1, 0)
interdata$lagos <- ifelse(interdata$city_fixed=="lagos-2125", 1, 0)
interdata$luanda <- ifelse(interdata$city_fixed=="luanda-3050", 1, 0)
interdata$maseru <- ifelse(interdata$city_fixed=="maseru-3631", 1, 0)
interdata$monrovia <- ifelse(interdata$city_fixed=="monrovia-1526", 1, 0)
interdata$ndola <- ifelse(interdata$city_fixed=="ndola-3859", 1, 0)
interdata$pointe_noire <- ifelse(interdata$city_fixed=="pointe_noire-2982", 1, 0)
interdata$port_elizabeth <- ifelse(interdata$city_fixed=="port_elizabeth-3505", 1, 0)
interdata$porto_novo <- ifelse(interdata$city_fixed=="porto_novo-2101", 1, 0)

interdata$int <- interdata$auto_med * interdata$state_housing_any

fe5 <- lm(orientation_order ~ (auto_med * state_housing_any) + state_housing_any + auto_med + average_age + node_count + elev_median + elev_std + accra + addis_ababa + bamako + bujumbura + cape_town + gombe + ibadan + johannesburg + kampala + khartoum + kinshasa + lagos + luanda + maseru + monrovia + ndola + pointe_noire + port_elizabeth + porto_novo, data= interdata )

vars <- c("auto_med", "state_housing_any", "average_age", "node_count", "elev_median", "elev_std", "accra", "addis_ababa", "bamako", "bujumbura", "cape_town", "gombe", "ibadan", "johannesburg", "kampala", "khartoum", "kinshasa", "lagos", "luanda", "maseru", "monrovia", "ndola", "pointe_noire", "port_elizabeth", "porto_novo", "int", "country")
correctorder_covars <- interdata[,vars]
w.dist<-cbind(interdata[,c("orientation_order", colnames(correctorder_covars))])
w.dist<-na.omit(w.dist)
dist.fan<-factor(w.dist[,ncol(w.dist)])
mat <- estfun(fe5)
N <- nrow(mat)
u <- apply(mat, 2, function(x) tapply(x, dist.fan, sum))
u <- na.omit(u)
K<-fe5 $rank
df <- (length(unique(w.dist[,ncol(w.dist)])) / (length(unique(w.dist[,ncol(w.dist)])) - 1))*((N - 1)/ (N - K))
vcovCL <- df*sandwich(fe5, meat=crossprod(u)/N)
fe5.final<-coeftest(fe5, vcovCL) 
fe5.final 

set.seed(02142)
	N <- 1000
	betas <- mvrnorm(n = N, mu=fe5.final[,1], vcovCL)
	per <- seq(0,1, by=0.01)

holder <- as.data.frame(matrix(NA, nrow=length(per), ncol=3))
colnames(holder) <- c("est", "low", "high")

for(i in 1:length(per)){
	
	x1 <- cbind(1, 1, per[i], interdata$average_age,interdata$node_count,interdata$elev_median,interdata$elev_std,interdata$accra,interdata$addis_ababa,interdata$bamako,interdata$bujumbura,interdata$cape_town,interdata$gombe,interdata$ibadan,interdata$johannesburg,interdata$kampala,interdata$khartoum,interdata$kinshasa,interdata$lagos, interdata$luanda, interdata$maseru,  interdata$monrovia, interdata$ndola, interdata$pointe_noire, interdata$port_elizabeth, interdata$porto_novo, 1* per[i])
	c1 <- cbind(1, 0, per[i], interdata$average_age, interdata$node_count,interdata$elev_median,interdata$elev_std,interdata$accra,interdata$addis_ababa,interdata$bamako,interdata$bujumbura,interdata$cape_town,interdata$gombe,interdata$ibadan,interdata$johannesburg,interdata$kampala,interdata$khartoum,interdata$kinshasa,interdata$lagos, interdata$luanda, interdata$maseru,  interdata$monrovia, interdata$ndola, interdata$pointe_noire, interdata$port_elizabeth, interdata$porto_novo, 0)

	tpred1 <- x1 %*% t(betas) #for each of the 1000 permutations of beta, the predicted value for each observation
	cpred1 <- c1 %*% t(betas) #for each of the 1000 permutations of beta, the predicted value for each observation
	diffpred1 <- apply(tpred1 - cpred1, 2, mean)
	
	holder[i,"est"] <- mean(diffpred1)
	holder[i,"low"] <- quantile(diffpred1, .025)
	holder[i,"high"] <- quantile(diffpred1, .975)

}

pdf(file="./figs/figure7.pdf", height=6, width=6) 
plot(per, holder$est, pch=16, col="white", cex=.7, xlab="proportion years with state housing\nwhen neighborhood is built", ylab="coefficient on High Autocracy", ylim=c(-0.1, 0.11), main="Interaction of autocracy and state housing", cex.main = .9)
abline(h=0, col="firebrick")
segments(per, holder[,"low"], per, holder[,"high"], col="gray72")
points(per, holder[,"est"], pch=16, col="dodgerblue2", cex=0.8)
rug(interdata$state_housing_any)
dev.off()
